William Loving (wfl9zy) James Sweat (jes9hd)
- Explore and visualize the broader Software Engineering field
- Create story-telling visualizations that provide sneak-peaks into the psyche of many comapnies as well as working individuals.
- Learn how to develop meaningful visualizations to communicate the data we have to an uninformed audience.
- Here our goal will be to create as many interesting plots as possible, this dataset is more feature rich so we should be able to find some interesting things.
- Note: This dataset was taken from job data in India, we hope the trends seen there will to some degree translate.
library(readr)
library(dplyr)
library(ggplot2)
library(plotly)
data <- read_csv("../data/india-data/Salary_Dataset_with_Extra_Features.csv")
## Rows: 22770 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Company Name, Job Title, Location, Employment Status, Job Roles
## dbl (3): Rating, Salary, Salaries Reported
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 8
## Rating `Company Name` `Job Title` Salary `Salaries Reported` Location
## <dbl> <chr> <chr> <dbl> <dbl> <chr>
## 1 3.8 Sasken Android De… 4e5 3 Bangalo…
## 2 4.5 Advanced Millennium Te… Android De… 4e5 3 Bangalo…
## 3 4 Unacademy Android De… 1e6 3 Bangalo…
## 4 3.8 SnapBizz Cloudtech Android De… 3e5 3 Bangalo…
## 5 4.4 Appoids Tech Solutions Android De… 6e5 3 Bangalo…
## 6 4.2 Freelancer Android De… 1e5 3 Bangalo…
## # ℹ 2 more variables: `Employment Status` <chr>, `Job Roles` <chr>
#Remove the one outlier
threshold <- 9000000
filtered_data <- data%>%
filter(Salary < threshold)
filtered_data$Location <- as.factor(filtered_data$Location)
sampled_data <- filtered_data[sample(nrow(data), 1000), ]
plot <- ggplot(filtered_data, aes(x=Rating, y=Salary)) +
geom_point() +
labs(
x="Rating",
y="Salary",
title="Correlation of Rating and Job Salary"
) +
theme_minimal()
plot
plot <- ggplot(sampled_data, aes(x=`Job Roles`
, y=`Location`
, fill=Rating
, text = paste("Location: ", Location, "<br>Job Role: ", `Job Roles`, "<br>Rating: ", Rating))) +
geom_tile() +
labs(
x = "Job Roles",
y = "Location",
fill = "Rating",
title = "Heatmap of Jobs By Location With Ratings"
) +
scale_fill_gradient(low="white", high="blue") +
theme_minimal()
plot
plot <- plot_ly(sampled_data, x = ~Rating, y = ~Salary, z = ~`Salaries Reported`, color=~Location, type = "scatter3d", mode = "markers")
plot
## Warning: Ignoring 2 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors